#install.packages("vegan")
pacman::p_load(tidyverse, devtools, phyloseq, patchwork, vegan,
install = FALSE)
# Load colors
n_colors <- c(
"0" = "#D9CC3C",
"150" = "#A0E0BA",
"300" = "coral2")
growth_stage_colors <- c (
"30" = "lightblue",
"75" = "darkblue")
planted_colors <- c (
"Unplanted" = "springgreen",
"Planted" = "purple")# Load in rooted phylogenetic tree!
load("data/03_Phylogenetic_Tree/phytree_preprocessed_physeq.RData")
midroot_physeq_rm18392## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 31491 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 31491 taxa by 9 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 31491 tips and 31490 internal nodes ]
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 31491 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 31491 taxa by 9 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 31491 tips and 31489 internal nodes ]
# Calculate the total number of reads per sample.
raw_TotalSeqs_df <-
midroot_physeq_rm18392 %>%
# calculate the sample read sums
sample_sums() %>%
data.frame()
# name the column
colnames(raw_TotalSeqs_df)[1] <- "TotalSeqs"
head(raw_TotalSeqs_df)## TotalSeqs
## SRR21916202 29262
## SRR21916203 38724
## SRR21916205 42736
## SRR21916206 37619
## SRR21916207 35134
## SRR21916208 30827
# make histogram of raw reads
raw_TotalSeqs_df %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50 ) +
scale_x_continuous(limits = c(0, 30000)) +
labs(title = "Raw Sequencing Depth Distribution") +
theme_classic()## Warning: Removed 23 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
raw_rooted_physeq <-
midroot_physeq_rm18392 %>%
# remove lowly seq sample that was outlier in alpha diversity analysis
subset_samples(names != "SRR21916204") %>%
# any asvs unique to this sample will also be removed
prune_taxa(taxa_sums(.) > 0, .)
# Inspect
raw_rooted_physeq## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 31491 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 31491 taxa by 9 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 31491 tips and 31490 internal nodes ]
## [1] 17544
### scale_reads function
####################################################################################
# Function to scale reads: http://deneflab.github.io/MicrobeMiseq/
# Scales reads by
# 1) taking proportions
# 2) multiplying by a given library size of n
# 3) rounding
# Default for n is the minimum sample size in your library
# Default for round is floor
matround <- function(x){trunc(x+0.5)}
scale_reads <- function(physeq, n = min(sample_sums(physeq)), round = "round") {
# transform counts to n
physeq.scale <- transform_sample_counts(physeq, function(x) {(n * x/sum(x))})
# Pick the rounding functions
if (round == "floor"){
otu_table(physeq.scale) <- floor(otu_table(physeq.scale))
} else if (round == "round"){
otu_table(physeq.scale) <- round(otu_table(physeq.scale))
} else if (round == "matround"){
otu_table(physeq.scale) <- matround(otu_table(physeq.scale))
}
# Prune taxa and return new phyloseq object
physeq.scale <- prune_taxa(taxa_sums(physeq.scale) > 0, physeq.scale)
return(physeq.scale)
}This is where one might decide use rarefaction to normalize data.
## [1] 17544
# Scale reads by the above function
scaled_rooted_physeq <-
raw_rooted_physeq %>%
scale_reads(round = "matround")
# Calculate the read depth
scaled_TotalSeqs_df <-
scaled_rooted_physeq %>%
sample_sums() %>%
data.frame()
colnames(scaled_TotalSeqs_df)[1] <-"TotalSeqs"
# Inspect
head(scaled_TotalSeqs_df)## TotalSeqs
## SRR21916202 17546
## SRR21916203 17459
## SRR21916205 17522
## SRR21916206 17387
## SRR21916207 17027
## SRR21916208 17618
## [1] 17027
## [1] 17869
## [1] 842
# Plot Histogram
scaled_TotalSeqs_df %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 30000)) +
labs(title = "Scaled Sequencing Depth at 2194") +
theme_classic()## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
Exploratory analyses from Paliy & Shankar (2016) paper, which is using unconstrained ordination methods like PCoA.
# Calculate sorensen dissimilarity: Abundance-unweighted of shared taxa
scaled_soren_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "bray", binary = TRUE)
#str(scaled_soren_pcoa)
# Plot the ordination
n_colors <- as.factor(n_colors)
soren_n_pcoa <- plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_soren_pcoa,
color = as.factor("n_level"),
title = "Sorensen PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = as.factor(n_level))) +
scale_color_manual(values = as.factor(n_colors)) +
theme_bw()
# Show the plot
soren_n_pcoa# Plot for comparison between planted vs. unplanted status
# Plot the ordination
planted_colors <- as.factor(planted_colors)
soren_planted_pcoa <- plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_soren_pcoa,
color = as.factor("planted"),
title = "Sorensen PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = as.factor(planted))) +
scale_color_manual(values = as.factor(planted_colors)) +
theme_bw()
# Show the plot
soren_planted_pcoa
Much clearer presence/absence separation based on planted vs. unplanted
status.
# Calculate the BC distance
scaled_BC_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "bray")
# Plot the PCoA
bray_n_pcoa <- plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_BC_pcoa,
color = as.factor("n_level"),
title = "Bray-Curtis PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = as.factor(n_level))) +
scale_color_manual(values = as.factor(n_colors)) +
theme_bw()
# Show the plot
bray_n_pcoa# Plot for comparison between planted vs. unplanted status
# Plot the PCoA
bray_planted_pcoa <- plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_BC_pcoa,
color = as.factor("planted"),
title = "Bray-Curtis PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = as.factor(planted))) +
scale_color_manual(values = as.factor(planted_colors)) +
theme_bw()
# Show the plot
bray_planted_pcoa
More variance can be explained by the axis of the abundance-weighted
ordination: 20% for Bray-Curtis vs. only 10.3% for Sorensen ordination.
Clear separations based on n-level are not evident, but separations
based on plant presence are!
# Calculate the BC distance
scaled_wUNI_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "wunifrac")
# Plot the PCoA
wUNI_n_pcoa <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_pcoa,
color = as.factor("n_level"),
title = "Weighted Unifrac PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = as.factor(n_level))) +
scale_color_manual(values = as.factor(n_colors)) +
theme_bw()
wUNI_n_pcoawUNI_planted_pcoa <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_pcoa,
color = as.factor("planted"),
title = "Weighted Unifrac PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = as.factor(planted))) +
scale_color_manual(values = as.factor(planted_colors)) +
theme_bw()
wUNI_planted_pcoa
32.1% variation explained by the Weighted Unifrac PCoA
#Let’s plot all three together into one plot to have a concise visualization of the three metrics.
(soren_n_pcoa + theme(legend.position = "none")) +
(bray_n_pcoa + theme(legend.position = "none")) +
(wUNI_n_pcoa + theme(legend.position = "none"))(soren_planted_pcoa + theme(legend.position = "none")) +
(bray_planted_pcoa + theme(legend.position = "none")) +
(wUNI_planted_pcoa + theme(legend.position = "none"))Since we did 3 of the dissimilarity metrics for the PCoA, let’s just plot one example of them for the NMDS plotting. Here, we will use weighted Unifrac
# Calculate the Weighted Unifrac distance
scaled_wUNI_nmds <-
ordinate(
physeq = scaled_rooted_physeq,
method = "NMDS",
distance = "wunifrac")## Run 0 stress 0.1777991
## Run 1 stress 0.1828857
## Run 2 stress 0.1762887
## ... New best solution
## ... Procrustes: rmse 0.1202383 max resid 0.2898712
## Run 3 stress 0.1846833
## Run 4 stress 0.1730309
## ... New best solution
## ... Procrustes: rmse 0.1020346 max resid 0.2983786
## Run 5 stress 0.176289
## Run 6 stress 0.1822507
## Run 7 stress 0.1809346
## Run 8 stress 0.1740514
## Run 9 stress 0.1887748
## Run 10 stress 0.1802961
## Run 11 stress 0.1841897
## Run 12 stress 0.1732299
## ... Procrustes: rmse 0.1217492 max resid 0.5223252
## Run 13 stress 0.1729327
## ... New best solution
## ... Procrustes: rmse 0.03561431 max resid 0.1410735
## Run 14 stress 0.1790464
## Run 15 stress 0.1876594
## Run 16 stress 0.1890485
## Run 17 stress 0.1719995
## ... New best solution
## ... Procrustes: rmse 0.05067667 max resid 0.2404631
## Run 18 stress 0.182121
## Run 19 stress 0.178459
## Run 20 stress 0.1775848
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 18: stress ratio > sratmax
## 2: scale factor of the gradient < sfgrmin
# Plot the PCoA
wUNI_n_nmds <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_nmds,
color = as.factor("n_level"),
title = "Weighted Unifrac NMDS") +
geom_point(size=5, alpha = 0.5, aes(color = as.factor(n_level))) +
scale_color_manual(values = as.factor(n_colors)) +
theme_bw()
wUNI_n_nmdswUNI_planted_nmds <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_nmds,
color = as.factor("planted"),
title = "Weighted Unifrac NMDS") +
geom_point(size=5, alpha = 0.5, aes(color = as.factor(planted))) +
scale_color_manual(values = as.factor(planted_colors)) +
theme_bw()
wUNI_planted_nmds(wUNI_planted_pcoa + theme(legend.position = "none")) +
(wUNI_planted_nmds + theme(legend.position = "none"))# Calculate all three of the distance matrices
scaled_sorensen_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray", binary = TRUE)
scaled_bray_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray")
scaled_wUnifrac_dist <- phyloseq::distance(scaled_rooted_physeq, method = "wunifrac")
# make a data frame from the sample_data
# All distance matrices will be the same metadata because they
# originate from the same phyloseq object.
metadata <- data.frame(sample_data(scaled_rooted_physeq))
# Adonis test
# In this example we are testing the hypothesis that the five stations
# that were collected have different centroids in the ordination space
# for each of the dissimilarity metrics, we are using a discrete variable
adonis2(scaled_sorensen_dist ~ n_level, data = metadata)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist ~ n_level, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## n_level 1 0.2398 0.03253 1.1095 0.031 *
## Residual 33 7.1334 0.96747
## Total 34 7.3732 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist ~ n_level, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## n_level 1 0.1126 0.033 1.1261 0.191
## Residual 33 3.2989 0.967
## Total 34 3.4115 1.000
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ n_level, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## n_level 1 0.01372 0.02958 1.0059 0.372
## Residual 33 0.45014 0.97042
## Total 34 0.46386 1.00000
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist ~ planted, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## planted 1 0.3654 0.04955 1.7205 0.001 ***
## Residual 33 7.0078 0.95045
## Total 34 7.3732 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist ~ planted, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## planted 1 0.3631 0.10644 3.9308 0.001 ***
## Residual 33 3.0484 0.89356
## Total 34 3.4115 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ planted, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## planted 1 0.06930 0.14939 5.7958 0.001 ***
## Residual 33 0.39456 0.85061
## Total 34 0.46386 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
No significant diffrence between nitrogen levels
Differences between planted and unplanted treatments are significant. The most difference is explained by the Weighted Unifrac.
# We might also care about other variables
# Here, we will add date and fraction as variables
# multiplicative model ORDER MATTERS!
adonis2(scaled_sorensen_dist ~ n_level * growth_stage * planted, data = metadata)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist ~ n_level * growth_stage * planted, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## n_level 1 0.2398 0.03253 1.1499 0.011 *
## growth_stage 1 0.2316 0.03142 1.1106 0.039 *
## planted 1 0.3648 0.04948 1.7492 0.001 ***
## n_level:growth_stage 1 0.2242 0.03041 1.0751 0.088 .
## n_level:planted 1 0.2233 0.03029 1.0708 0.104
## growth_stage:planted 1 0.2378 0.03226 1.1403 0.022 *
## n_level:growth_stage:planted 1 0.2202 0.02987 1.0560 0.151
## Residual 27 5.6313 0.76375
## Total 34 7.3732 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist ~ n_level * growth_stage * planted, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## n_level 1 0.1126 0.03300 1.2926 0.055 .
## growth_stage 1 0.1308 0.03835 1.5023 0.018 *
## planted 1 0.3651 0.10701 4.1915 0.001 ***
## n_level:growth_stage 1 0.1008 0.02954 1.1572 0.160
## n_level:planted 1 0.1138 0.03336 1.3066 0.053 .
## growth_stage:planted 1 0.1346 0.03945 1.5452 0.012 *
## n_level:growth_stage:planted 1 0.1023 0.02999 1.1749 0.124
## Residual 27 2.3516 0.68930
## Total 34 3.4115 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Note that the ORDER MATTERS!
adonis2(scaled_wUnifrac_dist ~ n_level * growth_stage * planted, data = metadata)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ n_level * growth_stage * planted, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## n_level 1 0.01372 0.02958 1.2941 0.171
## growth_stage 1 0.03026 0.06524 2.8539 0.003 **
## planted 1 0.07006 0.15103 6.6069 0.001 ***
## n_level:growth_stage 1 0.01150 0.02480 1.0849 0.295
## n_level:planted 1 0.01511 0.03257 1.4250 0.108
## growth_stage:planted 1 0.02371 0.05111 2.2361 0.011 *
## n_level:growth_stage:planted 1 0.01321 0.02848 1.2458 0.165
## Residual 27 0.28629 0.61719
## Total 34 0.46386 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ growth_stage * n_level * planted, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## growth_stage 1 0.03057 0.06591 2.8834 0.002 **
## n_level 1 0.01341 0.02891 1.2646 0.171
## planted 1 0.07006 0.15103 6.6069 0.001 ***
## growth_stage:n_level 1 0.01150 0.02480 1.0849 0.321
## growth_stage:planted 1 0.02447 0.05276 2.3080 0.007 **
## n_level:planted 1 0.01435 0.03093 1.3531 0.133
## growth_stage:n_level:planted 1 0.01321 0.02848 1.2458 0.197
## Residual 27 0.28629 0.61719
## Total 34 0.46386 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The PERMANOVA is sensitive to variance/dispersion in the data. Therefore, we need to run a homogeneity of dispersion test to test for the sensitivity of our PERMANOVA results to variance.
# Homogeneity of Dispersion test with beta dispr
# Sorensen
beta_soren_n <- betadisper(scaled_sorensen_dist, metadata$n_level)
permutest(beta_soren_n)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.0068845 0.0034422 6.1052 999 0.002 **
## Residuals 32 0.0180421 0.0005638
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
beta_soren_planted <- betadisper(scaled_sorensen_dist, metadata$planted)
permutest(beta_soren_planted)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.0000275 0.00002755 0.0309 999 0.868
## Residuals 33 0.0293858 0.00089048
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.006049 0.0030245 2.6353 999 0.08 .
## Residuals 32 0.036727 0.0011477
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.000271 0.00027128 0.1672 999 0.703
## Residuals 33 0.053533 0.00162221
# Weighted Unifrac
beta_bray_n <- betadisper(scaled_wUnifrac_dist, metadata$n_level)
permutest(beta_bray_n)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.0008306 0.00041531 0.9309 999 0.395
## Residuals 32 0.0142763 0.00044613
beta_bray_planted <- betadisper(scaled_wUnifrac_dist, metadata$planted)
permutest(beta_bray_planted)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.0000012 0.00000121 0.0022 999 0.963
## Residuals 33 0.0178210 0.00054003
Variance of Sorensen influenced by nitrogen level.
# Set the phylum colors
phylum_colors <- c(
Acidobacteriota = "navy",
Actinobacteria = "purple",
Gemmatimonadota = "orange",
Patescibacteria = "lightpink",
Firmacutes = "darkslategray2",
Armatimonadota = "deeppink1",
Alphaproteobacteria = "plum2",
Bacteroidota = "gold",
Betaproteobacteria = "plum1",
Bdellovibrionota = "red1",
Chloroflexi="black",
Crenarchaeota = "firebrick",
Cyanobacteria = "limegreen",
Deltaproteobacteria = "grey",
Desulfobacterota="magenta",
Firmicutes = "#3E9B96",
Gammaproteobacteria = "greenyellow",
"Marinimicrobia (SAR406 clade)" = "yellow",
Myxococcota = "#B5D6AA",
Nitrospirota = "palevioletred1",
Proteobacteria = "royalblue",
Planctomycetota = "darkorange",
"SAR324 clade(Marine group B)" = "olivedrab",
"WPS-2"= "greenyellow",
Thermoplasmatota = "green",
Verrucomicrobiota = "darkorchid1")
# Other = "grey")# Calculate the phylum relative abundance
# Note: The read depth MUST be normalized in some way: scale_reads
phylum_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Phylum") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)phylum_df %>%
# Warning: It's important to have one sample per x value,
# otherwise, it will take the sum between multiple samples
ggplot(aes(x = planted, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~growth_stage~n_level, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Surface Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
strip.text.y = element_text(size = 5, angle = 90))# Narrow in on a specific group
# Firmacutes - y: abundance, x: station, dot plot + boxplot
phylum_df %>%
dplyr::filter(Phylum == "Firmicutes") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = planted, color = planted)) +
facet_wrap(.~Phylum, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Firmicutes Phylum Abundance", ylab = "Planted") +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "right")# Calculate the Family relative abundance
# Note: The read depth MUST be normalized in some way: scale_reads
family_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Family") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
# Check family_df
#str(family_df)
family_df %>%
dplyr::filter(Phylum == "Firmicutes") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = planted, color = planted)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Firmicutes Family Abundance", ylab = "Planted") +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "right")#Caulobacteraceae - keystone taxa identified by Cui et al.for sampling on day 30.
family_df %>%
dplyr::filter(Family == "Caulobacteraceae") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = planted, color = planted)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Caulobacteraceae Family Abundance Planted vs. Unplanted", ylab = "Planted") +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "right")family_df %>%
dplyr::filter(Family == "Caulobacteraceae") %>%
# build the plot
ggplot(aes(x = growth_stage, y = Abundance,
fill = as.factor(growth_stage), color = as.factor(growth_stage))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Caulobacteraceae Family Abundance", xlab = ("Growth Stage")) +
scale_color_manual(values = growth_stage_colors) +
scale_fill_manual(values = growth_stage_colors) +
theme(axis.text.x = element_text(),
legend.position = "right", legend.title=element_blank())#Chitinophagaceae - keystone taxa identified by Cui et al.for sampling on day 30.
family_df %>%
dplyr::filter(Family == "Chitinophagaceae") %>%
# build the plot
ggplot(aes(x = growth_stage, y = Abundance,
fill = as.factor(growth_stage), color = as.factor(growth_stage))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Chitinophagaceae Family Abundance", xlab = ("Growth Stage")) +
scale_color_manual(values = growth_stage_colors) +
scale_fill_manual(values = growth_stage_colors) +
theme(axis.text.x = element_text(),
legend.position = "right", legend.title=element_blank())family_df %>%
dplyr::filter(Family == "Chitinophagaceae") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = planted, color = planted)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Chitinophagaceae Family Abundance Planted vs. Unplanted", xlab = ("Planted")) +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(),
legend.position = "right", legend.title=element_blank())# Xanthobacteraceae - keystone taxa identified by Cui et al.for sampling on day 75
family_df %>%
dplyr::filter(Family == "Xanthobacteraceae") %>%
# build the plot
ggplot(aes(x = growth_stage, y = Abundance,
fill = as.factor(growth_stage), color = as.factor(growth_stage))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Xanthobacteraceae Family Abundance", xlab = ("Growth Stage")) +
scale_color_manual(values = growth_stage_colors) +
scale_fill_manual(values = growth_stage_colors) +
theme(axis.text.x = element_text(),
legend.position = "right", legend.title=element_blank())family_df %>%
dplyr::filter(Family == "Xanthobacteraceae") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = planted, color = planted)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Xanthobacteraceae Family Abundance Planted vs. Unplanted", xlab = ("Planted")) +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(),
legend.position = "right", legend.title=element_blank())# Calculate the Family relative abundance
# Note: The read depth MUST be normalized in some way: scale_reads
genus_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Genus") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out Phyla that are <1% - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
genus_df %>%
dplyr::filter(Phylum == "Firmicutes") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = planted, color = planted)) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Firmicutes Genus Abundance", ylab = "Planted") +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "right")#Bacillus - keystone taxa identified by Cui et al.for sampling on day 30
genus_df %>%
dplyr::filter(Genus == "Bacillus") %>%
# build the plot
ggplot(aes(x = growth_stage, y = Abundance,
fill = as.factor(growth_stage), color = as.factor(growth_stage))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Bacillus Genus Abundance", xlab = ("Growth Stage")) +
scale_color_manual(values = growth_stage_colors) +
scale_fill_manual(values = growth_stage_colors) +
theme(axis.text.x = element_text(),
legend.position = "bottom", legend.title=element_blank())genus_df %>%
dplyr::filter(Genus == "Bacillus") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = as.factor(planted), color = as.factor(planted))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Bacillus Genus Abundance Planted vs. Unplanted", xlab = ("Planted")) +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(),
legend.position = "bottom", legend.title=element_blank())#Gemmatimonas - keystone taxa identified by Cui et al.for sampling on day 75
genus_df %>%
dplyr::filter(Genus == "Gemmatimonas") %>%
# build the plot
ggplot(aes(x = growth_stage, y = Abundance,
fill = as.factor(growth_stage), color = as.factor(growth_stage))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Gemmatimonas Genus Abundance", xlab = ("Growth Stage")) +
scale_color_manual(values = growth_stage_colors) +
scale_fill_manual(values = growth_stage_colors) +
theme(axis.text.x = element_text(),
legend.position = "bottom", legend.title=element_blank())genus_df %>%
dplyr::filter(Genus == "Gemmatimonas") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = as.factor(planted), color = as.factor(planted))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Gemmatimonas Genus Abundance Planted vs", xlab = ("Planted")) +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(),
legend.position = "bottom", legend.title=element_blank())#Candidatus Koribacter - keystone taxa identified by Cui et al.for sampling on day 75
genus_df %>%
dplyr::filter(Genus == "Candidatus Koribacter") %>%
# build the plot
ggplot(aes(x = growth_stage, y = Abundance,
fill = as.factor(growth_stage), color = as.factor(growth_stage))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Candidatus Koribacter Genus Abundance", xlab = ("Growth Stage")) +
scale_color_manual(values = growth_stage_colors) +
scale_fill_manual(values = growth_stage_colors) +
theme(axis.text.x = element_text(),
legend.position = "bottom", legend.title=element_blank())genus_df %>%
dplyr::filter(Genus == "Candidatus Koribacter") %>%
# build the plot
ggplot(aes(x = planted, y = Abundance,
fill = as.factor(planted), color = as.factor(planted))) +
facet_wrap(.~Family, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) + # outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Candidatus Koribacter Genus Abundance Planted vs", xlab = ("Planted")) +
scale_color_manual(values = planted_colors) +
scale_fill_manual(values = planted_colors) +
theme(axis.text.x = element_text(),
legend.position = "bottom", legend.title=element_blank())For reproducibility
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.3.2 (2023-10-31)
## os Rocky Linux 9.0 (Blue Onyx)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2024-04-30
## pandoc 3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## ade4 1.7-22 2023-02-06 [1] CRAN (R 4.3.2)
## ape 5.7-1 2023-03-13 [2] CRAN (R 4.3.2)
## Biobase 2.62.0 2023-10-24 [2] Bioconductor
## BiocGenerics 0.48.1 2023-11-01 [2] Bioconductor
## biomformat 1.30.0 2023-10-24 [1] Bioconductor
## Biostrings 2.70.1 2023-10-25 [2] Bioconductor
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.3.2)
## bslib 0.5.1 2023-08-11 [2] CRAN (R 4.3.2)
## cachem 1.0.8 2023-05-01 [2] CRAN (R 4.3.2)
## callr 3.7.3 2022-11-02 [2] CRAN (R 4.3.2)
## cli 3.6.1 2023-03-23 [2] CRAN (R 4.3.2)
## cluster 2.1.4 2022-08-22 [2] CRAN (R 4.3.2)
## codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.2)
## colorspace 2.1-0 2023-01-23 [2] CRAN (R 4.3.2)
## crayon 1.5.2 2022-09-29 [2] CRAN (R 4.3.2)
## data.table 1.14.8 2023-02-17 [2] CRAN (R 4.3.2)
## devtools * 2.4.5 2022-10-11 [1] CRAN (R 4.3.2)
## digest 0.6.33 2023-07-07 [2] CRAN (R 4.3.2)
## dplyr * 1.1.3 2023-09-03 [2] CRAN (R 4.3.2)
## ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.3.2)
## evaluate 0.23 2023-11-01 [2] CRAN (R 4.3.2)
## fansi 1.0.5 2023-10-08 [2] CRAN (R 4.3.2)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.3.2)
## fastmap 1.1.1 2023-02-24 [2] CRAN (R 4.3.2)
## forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.2)
## foreach 1.5.2 2022-02-02 [2] CRAN (R 4.3.2)
## fs 1.6.3 2023-07-20 [2] CRAN (R 4.3.2)
## generics 0.1.3 2022-07-05 [2] CRAN (R 4.3.2)
## GenomeInfoDb 1.38.0 2023-10-24 [2] Bioconductor
## GenomeInfoDbData 1.2.11 2023-11-07 [2] Bioconductor
## ggplot2 * 3.5.0 2024-02-23 [2] CRAN (R 4.3.2)
## glue 1.6.2 2022-02-24 [2] CRAN (R 4.3.2)
## gtable 0.3.4 2023-08-21 [2] CRAN (R 4.3.2)
## highr 0.10 2022-12-22 [2] CRAN (R 4.3.2)
## hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.2)
## htmltools 0.5.7 2023-11-03 [2] CRAN (R 4.3.2)
## htmlwidgets 1.6.2 2023-03-17 [2] CRAN (R 4.3.2)
## httpuv 1.6.12 2023-10-23 [2] CRAN (R 4.3.2)
## igraph 1.5.1 2023-08-10 [2] CRAN (R 4.3.2)
## IRanges 2.36.0 2023-10-24 [2] Bioconductor
## iterators 1.0.14 2022-02-05 [2] CRAN (R 4.3.2)
## jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.3.2)
## jsonlite 1.8.7 2023-06-29 [2] CRAN (R 4.3.2)
## knitr 1.45 2023-10-30 [2] CRAN (R 4.3.2)
## labeling 0.4.3 2023-08-29 [2] CRAN (R 4.3.2)
## later 1.3.1 2023-05-02 [2] CRAN (R 4.3.2)
## lattice * 0.21-9 2023-10-01 [2] CRAN (R 4.3.2)
## lifecycle 1.0.3 2022-10-07 [2] CRAN (R 4.3.2)
## lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.3.2)
## magrittr 2.0.3 2022-03-30 [2] CRAN (R 4.3.2)
## MASS 7.3-60 2023-05-04 [2] CRAN (R 4.3.2)
## Matrix 1.6-1.1 2023-09-18 [2] CRAN (R 4.3.2)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.3.2)
## mgcv 1.9-0 2023-07-11 [2] CRAN (R 4.3.2)
## mime 0.12 2021-09-28 [2] CRAN (R 4.3.2)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.3.2)
## multtest 2.58.0 2023-10-24 [1] Bioconductor
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.3.2)
## nlme 3.1-163 2023-08-09 [2] CRAN (R 4.3.2)
## pacman 0.5.1 2019-03-11 [1] CRAN (R 4.3.2)
## patchwork * 1.2.0 2024-01-08 [1] CRAN (R 4.3.2)
## permute * 0.9-7 2022-01-27 [1] CRAN (R 4.3.2)
## phyloseq * 1.41.1 2024-03-13 [1] Github (joey711/phyloseq@c260561)
## pillar 1.9.0 2023-03-22 [2] CRAN (R 4.3.2)
## pkgbuild 1.4.2 2023-06-26 [2] CRAN (R 4.3.2)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.3.2)
## pkgload 1.3.3 2023-09-22 [2] CRAN (R 4.3.2)
## plyr 1.8.9 2023-10-02 [2] CRAN (R 4.3.2)
## prettyunits 1.2.0 2023-09-24 [2] CRAN (R 4.3.2)
## processx 3.8.2 2023-06-30 [2] CRAN (R 4.3.2)
## profvis 0.3.8 2023-05-02 [2] CRAN (R 4.3.2)
## promises 1.2.1 2023-08-10 [2] CRAN (R 4.3.2)
## ps 1.7.5 2023-04-18 [2] CRAN (R 4.3.2)
## purrr * 1.0.2 2023-08-10 [2] CRAN (R 4.3.2)
## R6 2.5.1 2021-08-19 [2] CRAN (R 4.3.2)
## Rcpp 1.0.11 2023-07-06 [2] CRAN (R 4.3.2)
## RCurl 1.98-1.13 2023-11-02 [2] CRAN (R 4.3.2)
## readr * 2.1.5 2024-01-10 [1] CRAN (R 4.3.2)
## remotes 2.4.2.1 2023-07-18 [2] CRAN (R 4.3.2)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.3.2)
## rhdf5 2.46.1 2023-11-29 [1] Bioconductor 3.18 (R 4.3.2)
## rhdf5filters 1.14.1 2023-11-06 [1] Bioconductor
## Rhdf5lib 1.24.2 2024-02-07 [1] Bioconductor 3.18 (R 4.3.2)
## rlang 1.1.2 2023-11-04 [2] CRAN (R 4.3.2)
## rmarkdown 2.25 2023-09-18 [2] CRAN (R 4.3.2)
## rstudioapi 0.15.0 2023-07-07 [2] CRAN (R 4.3.2)
## S4Vectors 0.40.1 2023-10-26 [2] Bioconductor
## sass 0.4.7 2023-07-15 [2] CRAN (R 4.3.2)
## scales 1.3.0 2023-11-28 [2] CRAN (R 4.3.2)
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.3.2)
## shiny 1.7.5.1 2023-10-14 [2] CRAN (R 4.3.2)
## stringi 1.7.12 2023-01-11 [2] CRAN (R 4.3.2)
## stringr * 1.5.0 2022-12-02 [2] CRAN (R 4.3.2)
## survival 3.5-7 2023-08-14 [2] CRAN (R 4.3.2)
## tibble * 3.2.1 2023-03-20 [2] CRAN (R 4.3.2)
## tidyr * 1.3.0 2023-01-24 [2] CRAN (R 4.3.2)
## tidyselect 1.2.0 2022-10-10 [2] CRAN (R 4.3.2)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.2)
## timechange 0.3.0 2024-01-18 [1] CRAN (R 4.3.2)
## tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.2)
## urlchecker 1.0.1 2021-11-30 [2] CRAN (R 4.3.2)
## usethis * 2.2.2 2023-07-06 [2] CRAN (R 4.3.2)
## utf8 1.2.4 2023-10-22 [2] CRAN (R 4.3.2)
## vctrs 0.6.4 2023-10-12 [2] CRAN (R 4.3.2)
## vegan * 2.6-4 2022-10-11 [1] CRAN (R 4.3.2)
## withr 2.5.2 2023-10-30 [2] CRAN (R 4.3.2)
## xfun 0.41 2023-11-01 [2] CRAN (R 4.3.2)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.3.2)
## XVector 0.42.0 2023-10-24 [2] Bioconductor
## yaml 2.3.7 2023-01-23 [2] CRAN (R 4.3.2)
## zlibbioc 1.48.0 2023-10-24 [2] Bioconductor
##
## [1] /home/alm379/R/x86_64-pc-linux-gnu-library/4.3
## [2] /programs/R-4.3.2/library
##
## ──────────────────────────────────────────────────────────────────────────────